Out of 30 samples, we selected 17 for this study. These are the normal tissue samples form the control, the UVA and the UVA+SFN treatment groups. normal tissue samples from the UVB_UA groups as well as tumor samples were excluded from this analysis. Additionally, one of the control samples at Week 2 (baseline) was removed after outlier analysis.
7,219 genes with zero counts in > 80% (> 13 out of 18) of samples were removed. 17,202 out of 24,421 genes were left.
[1] 7219
[1] 17202
Next, we noramized the counts. To convert number of hits to the relative abundane of genes in each sample, we used transcripts per kilobase million (TPM) normalization, which is as following for the j-th sample:
1. normilize for gene length: a[i, j] = 1,000*count[i, j]/gene[i, j] length(bp)
2. normalize for seq depth (i.e. total count): a(i, j)/sum(a[, j])
3. multiply by one million
A very good comparison of normalization techniques can be found at the following video:
RPKM, FPKM and TPM, clearly explained
After the normalization, each sample’s total is 1M:
02w_CON_0 02w_SFN_0 02w_SFN_1 02w_UVB_0 02w_UVB_1 15w_CON_0 15w_CON_1 15w_SFN_0 15w_SFN_1 15w_UVB_0 15w_UVB_1
1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
25w_CON_0 25w_CON_1 25w_SFN_0 25w_SFN_1 25w_UVB_0 25w_UVB_1
1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
Color Legend:
YELLOW: TMP > 10
RED: TMP > 100
# Separate top 100 abundant genes
tmp <- droplevels(tpm[Geneid %in% levels(tpm$Geneid)[(nrow(tpm) - 99):nrow(tpm)]])
tmp <- melt.data.table(data = tmp,
id.vars = 1:2,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp$Week <- substr(x = tmp$Sample,
start = 1,
stop = 3)
tmp$Week <- factor(tmp$Week,
levels = unique(tmp$Week))
tmp$Treatment <- substr(x = tmp$Sample,
start = 5,
stop = 7)
tmp$Treatment <- factor(tmp$Treatment,
levels = c("CON",
"UVB",
"SFN"))
tmp$Replica <- substr(x = tmp$Sample,
start = 9,
stop = 9)
tmp$Replica <- factor(tmp$Replica,
levels = 0:1)
# Plot top 100 abundant genes
p2 <- ggplot(tmp,
aes(x = TPM,
y = Geneid,
fill = Treatment,
shape = Week)) +
# facet_wrap(~ Sex, nrow = 1) +
geom_point(size = 3,
alpha = 0.5) +
geom_vline(xintercept = 1,
linetype = "dashed")
ggplotly(p2)
tmp <- droplevels(tpm[Geneid %in% levels(tpm$Geneid)[1:100]])
tmp <- melt.data.table(data = tmp,
id.vars = 1:2,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp$Week <- substr(x = tmp$Sample,
start = 1,
stop = 3)
tmp$Week <- factor(tmp$Week,
levels = unique(tmp$Week))
tmp$Treatment <- substr(x = tmp$Sample,
start = 5,
stop = 7)
tmp$Treatment <- factor(tmp$Treatment,
levels = c("CON",
"UVB",
"SFN"))
tmp$Replica <- substr(x = tmp$Sample,
start = 9,
stop = 9)
tmp$Replica <- factor(tmp$Replica,
levels = 0:1)
# Plot top 100 abundant genes
p3 <- ggplot(tmp,
aes(x = TPM,
y = Geneid,
fill = Treatment,
shape = Week)) +
# facet_wrap(~ Sex, nrow = 1) +
geom_point(size = 3,
alpha = 0.5) +
geom_vline(xintercept = 1,
linetype = "dashed")
ggplotly(p3)
dmeta <- data.table(Sample = colnames(dt1)[-c(1:2)])
dmeta$time <- substr(x = dmeta$Sample,
start = 1,
stop = 3)
dmeta$time <- factor(dmeta$time,
levels = c("02w",
"15w",
"25w"))
dmeta$Week <- factor(dmeta$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dmeta$trt <- substr(x = dmeta$Sample,
start = 5,
stop = 7)
dmeta$trt <- factor(dmeta$trt,
levels = c("CON",
"UVB",
"SFN"))
dmeta$Treatment <- factor(dmeta$trt,
levels = c("CON",
"UVB",
"SFN"),
labels = c("Negative Control",
"Positive Control (UVB)",
"Sulforaphane (SFN)"))
dmeta$Replica <- substr(x = dmeta$Sample,
start = 9,
stop = 9)
dmeta$Replica <- factor(dmeta$Replica,
levels = 0:1)
datatable(dmeta,
rownames = FALSE,
class = "cell-border stripe",
options = list(pageLength = nrow(dmeta)))
NOTE: the distributions are skewed. To make them symmetric, log transformation is often applied. However, there is an issue of zeros. In this instance, we added a small values lambda[i] equal to 1/10 of the smallest non-zero value of i-th gene.
dm.tpm <- as.matrix(tpm[, -c(1:2), with = FALSE])
rownames(dm.tpm) <- tpm$Geneid
# # Remove 02w_CON_1 sample and redo PCA
# dm.tpm <- dm.tpm[, colnames(dm.tpm) != "02w_CON_1"]
# dmeta <- dmeta[dmeta$Sample != "02w_CON_1", ]
# Add lambdas to all values, then take a log
dm.ltpm <- t(apply(X = dm.tpm,
MARGIN = 1,
FUN = function(a) {
lambda <- min(a[a > 0])/10
log(a + lambda)
}))
# PCA----
m1 <- prcomp(t(dm.ltpm),
center = TRUE,
scale. = TRUE)
s1 <- summary(m1)
s1
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
Standard deviation 66.5041 61.8206 45.2845 30.42909 28.24422 26.84136 25.01865 23.05989 22.08373 21.24391
Proportion of Variance 0.2571 0.2222 0.1192 0.05383 0.04637 0.04188 0.03639 0.03091 0.02835 0.02624
Cumulative Proportion 0.2571 0.4793 0.5985 0.65232 0.69869 0.74058 0.77696 0.80788 0.83623 0.86246
PC11 PC12 PC13 PC14 PC15 PC16 PC17
Standard deviation 20.87624 20.6980 20.28169 19.42403 19.14803 18.61200 2.085e-13
Proportion of Variance 0.02534 0.0249 0.02391 0.02193 0.02131 0.02014 0.000e+00
Cumulative Proportion 0.88780 0.9127 0.93662 0.95855 0.97986 1.00000 1.000e+00
imp <- data.table(PC = colnames(s1$importance),
Variance = 100*s1$importance[2, ],
Cumulative = 100*s1$importance[3, ])
imp$PC <- factor(imp$PC,
levels = imp$PC)
p1 <- ggplot(imp,
aes(x = PC,
y = Variance)) +
geom_bar(stat = "identity",
fill = "grey",
color = "black") +
geom_line(aes(y = rescale(Cumulative,
to = c(min(Cumulative)*max(imp$Variance)/100,
max(imp$Variance))),
group = rep(1, nrow(imp)))) +
geom_point(aes(y = rescale(Cumulative,
to = c(min(Cumulative)*max(imp$Variance)/100,
max(imp$Variance))))) +
scale_y_continuous("% Variance Explained",
breaks = seq(from = 0,
to = max(imp$Variance),
by = 5),
labels = paste(seq(from = 0,
to = max(imp$Variance),
by = 5),
"%",
sep = ""),
sec.axis = sec_axis(trans = ~.,
name = "% Cumulative Variance",
breaks = seq(from = 0,
to = max(imp$Variance),
length.out = 5),
labels = paste(seq(from = 0,
to = 100,
length.out = 5),
"%",
sep = ""))) +
scale_x_discrete("") +
theme(axis.text.x = element_text(angle = 90,
hjust = 1))
# Save for publication
tiff(filename = "tmp/pca_pareto.tiff",
height = 6,
width = 8,
units = 'in',
res = 600,
compression = "lzw+p")
print(p1)
graphics.off()
print(p1)
# Biplot while keep only the most important variables (Javier)----
# Select PC-s to pliot (PC1 & PC2)
choices <- c(1:3)
# Scores, i.e. points (df.u)
dt.scr <- data.table(m1$x[, choices])
# Add grouping variables
dt.scr$trt <- dmeta$trt
dt.scr$time <- dmeta$time
dt.scr$sample <- dmeta$Sample
# Loadings, i.e. arrows (df.v)
dt.rot <- as.data.frame(m1$rotation[, choices])
dt.rot$feat <- rownames(dt.rot)
dt.rot <- data.table(dt.rot)
# Axis labels
u.axis.labs <- paste(colnames(dt.rot)[choices],
sprintf('(%0.1f%% explained var.)',
100*m1$sdev[choices]^2/sum(m1$sdev^2)))
p1 <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC2,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[1]) +
scale_y_continuous(u.axis.labs[2]) +
theme(legend.position = "none")
ggplotly(p1)
p2 <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC3,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[1]) +
scale_y_continuous(u.axis.labs[3]) +
theme(legend.position = "none")
ggplotly(p2)
p3 <- ggplot(data = dt.scr,
aes(x = PC2,
y = PC3,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[2]) +
scale_y_continuous(u.axis.labs[3]) +
theme(legend.position = "none")
ggplotly(p3)
# Legend only
tmp <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC2,
color = trt,
shape = time)) +
geom_point() +
scale_color_discrete("Treatment") +
scale_shape_discrete("Week")
p4 <- as_ggplot(get_legend(tmp))
# Save for publication
tiff(filename = "tmp/pca.tiff",
height = 7,
width = 9,
units = 'in',
res = 600,
compression = "lzw+p")
grid.arrange(p1, p2, p3, p4,
nrow = 2)
graphics.off()
scatterplot3js(x = dt.scr$PC1,
y = dt.scr$PC2,
z = dt.scr$PC3,
color = as.numeric(dt.scr$trt),
renderer = "auto",
pch = dt.scr$sample,
size = 0.1)
Sources:
1. Analyzing RNA-seq data with DESeq2:Interactions
2. Bioconductor Question: DESeq2 time series analysis
We are testing a model with time*treatment interaction. The idea here is to find genes with significant interaction term. That would suggest that the gene expressiondifferences between the treatments depended on time. THere are several possible scenarios:
a. No difference between the negative control and the positive control groups at baseline, significant difference at the later time point. This will show the effect of the disease (UVB radiation, in this case).
b. Significant difference between the control groups at baseline, no difference at the later time point. Same as (a) above.
c. Differences between the positive control and the SFN-treated groups. Here, we are interested in the reversal of UVB effect. Again, the interaction term will need to be significant for the reasons described above.
# Relevel: make all comparisons with the positive control (UVB)
dmeta$trt <- factor(dmeta$trt,
levels = c("UVB",
"CON",
"SFN"))
dtm<- as.matrix(dt1[, dmeta$Sample,
with = FALSE])
rownames(dtm) <- dt1$Geneid
dds <- DESeqDataSetFromMatrix(countData = dtm,
colData = dmeta,
~ time + trt + time:trt)
# If all samples contain zeros, geometric means cannot be
# estimated. Change default 'type = "ratio"' to 'type = "poscounts"'.
# Type '?DESeq2::estimateSizeFactors' for more details.
dds <- estimateSizeFactors(object = dds,
type = "poscounts")
# Run DESeq----
dds <- DESeq(object = dds,
# test = "LRT",
# reduced = ~ time + trt,
fitType = "local",
sfType = "ratio",
parallel = FALSE)
using pre-existing size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
# NOTE (from DESeq help file, section Value):
# A DESeqDataSet object with results stored as metadata columns.
# These results should accessed by calling the results function.
# By default this will return the log2 fold changes and p-values
# for the last variable in the design formula.
# See results for how to access results for other variables.
# In this case, the last term is the interaction term trt:time
# NOTE:
# Likelihood ratio test (LRT) (chi-squared test) for GLM will only return
# the results for the difference between the full and the reduced model
resultsNames(dds)
[1] "Intercept" "time_15w_vs_02w" "time_25w_vs_02w" "trt_CON_vs_UVB" "trt_SFN_vs_UVB" "time15w.trtCON"
[7] "time25w.trtCON" "time15w.trtSFN" "time25w.trtSFN"
# Model matrix
mm1 <- model.matrix(~ time + trt + time:trt, dmeta)
mm1
(Intercept) time15w time25w trtCON trtSFN time15w:trtCON time25w:trtCON time15w:trtSFN time25w:trtSFN
1 1 0 0 1 0 0 0 0 0
2 1 0 0 0 1 0 0 0 0
3 1 0 0 0 1 0 0 0 0
4 1 0 0 0 0 0 0 0 0
5 1 0 0 0 0 0 0 0 0
6 1 1 0 1 0 1 0 0 0
7 1 1 0 1 0 1 0 0 0
8 1 1 0 0 1 0 0 1 0
9 1 1 0 0 1 0 0 1 0
10 1 1 0 0 0 0 0 0 0
11 1 1 0 0 0 0 0 0 0
12 1 0 1 1 0 0 1 0 0
13 1 0 1 1 0 0 1 0 0
14 1 0 1 0 1 0 0 0 1
15 1 0 1 0 1 0 0 0 1
16 1 0 1 0 0 0 0 0 0
17 1 0 1 0 0 0 0 0 0
attr(,"assign")
[1] 0 1 1 2 2 3 3 3 3
attr(,"contrasts")
attr(,"contrasts")$time
[1] "contr.treatment"
attr(,"contrasts")$trt
[1] "contr.treatment"
head(mcols(dds))
DataFrame with 6 rows and 50 columns
baseMean baseVar allZero dispGeneEst dispGeneIter dispFit
<numeric> <numeric> <logical> <numeric> <numeric> <numeric>
Xkr4 0.414423785139076 0.750734393874421 FALSE 1e-08 1 2.35686251255345
Mrpl15 497.506315418383 6139.21631388383 FALSE 0.00292023552394721 6 0.00975181583387631
Lypla1 1316.42450437205 94053.122870121 FALSE 0.00514177871417793 10 0.0074100485818535
Tcea1 362.833336721312 2447.08771392985 FALSE 1e-08 20 0.0123515065189161
Rgs20 412.785226796461 8337.26279018443 FALSE 0.0222228623148068 8 0.0111228088946145
Atp6v1h 1163.12136188358 26870.2895984056 FALSE 0.00473653527254895 9 0.00743062379729061
dispersion dispIter dispOutlier dispMAP Intercept time_15w_vs_02w
<numeric> <integer> <logical> <numeric> <numeric> <numeric>
Xkr4 6.43661011051539 8 FALSE 6.43661011051539 -2.359805612164 -0.228477588168501
Mrpl15 0.0060101698743299 8 FALSE 0.0060101698743299 9.06594448953328 -0.137408907813809
Lypla1 0.00604102606581283 9 FALSE 0.00604102606581283 10.7337301130648 -0.629677974788472
Tcea1 0.00715812241817593 7 FALSE 0.00715812241817593 8.78214921631808 -0.516217579095005
Rgs20 0.0168637514204584 11 FALSE 0.0168637514204584 8.98928399842352 -0.547987096260501
Atp6v1h 0.00580961463958366 9 FALSE 0.00580961463958366 10.4068496272689 -0.491695240290437
time_25w_vs_02w trt_CON_vs_UVB trt_SFN_vs_UVB time15w.trtCON time25w.trtCON
<numeric> <numeric> <numeric> <numeric> <numeric>
Xkr4 -0.165844507463528 0.0598562849180821 1.6582080718198 2.45478731530058 3.43262855563513
Mrpl15 -0.0412786898053219 -0.308591258014163 0.199168294921519 0.0156586240981802 -0.102536901707458
Lypla1 -0.599280178188303 -0.305684497430534 0.179718039995711 0.281344903276623 0.348189855674569
Tcea1 -0.446190172830842 -0.196562316500229 -0.0830935380769627 0.309506757416714 0.476511703155704
Rgs20 -0.45980987283847 -0.0685634893160301 0.113854717310148 -0.0460895727086707 -0.119888249480383
Atp6v1h -0.365919358337453 -0.17807000833384 0.0799789915431519 0.246974241692442 0.3213538709111
time15w.trtSFN time25w.trtSFN SE_Intercept SE_time_15w_vs_02w SE_time_25w_vs_02w
<numeric> <numeric> <numeric> <numeric> <numeric>
Xkr4 -1.67076473658365 -1.57787360996648 2.96284694407196 4.19009832838988 4.19009832833768
Mrpl15 -0.178149323759463 -0.0710688162246861 0.0911721029656416 0.128443851313393 0.12828481198956
Lypla1 -0.107101147581259 -0.0334225250935585 0.0832744171626043 0.118643311639213 0.118734269568226
Tcea1 0.271157924759699 0.104295727467573 0.0997712835011623 0.143038313778663 0.142999047037131
Rgs20 0.24522833856804 -0.0021489805803274 0.140429387999271 0.199941844408924 0.199839918784151
Atp6v1h 0.171814404854682 0.0421037362927887 0.082816070681427 0.117808586333953 0.117641366800501
SE_trt_CON_vs_UVB SE_trt_SFN_vs_UVB SE_time15w.trtCON SE_time25w.trtCON SE_time15w.trtSFN SE_time25w.trtSFN
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Xkr4 5.13171132899515 4.17888254775382 6.55361338849756 6.5053324829564 5.91776899533986 5.9177689953029
Mrpl15 0.161651924073278 0.128745553404475 0.208065870702812 0.207540172422408 0.181401275197653 0.181107411065465
Lypla1 0.145519737526484 0.117738852864755 0.188694868576427 0.188378486771387 0.16762877155602 0.167772361896891
Tcea1 0.175595965579231 0.142746566195654 0.228119616478045 0.226530264396902 0.202476358816243 0.203686983950883
Rgs20 0.244051721146729 0.198791901404911 0.317398591717306 0.316856976912824 0.281821816802482 0.282640507763004
Atp6v1h 0.144448031460997 0.117328387791251 0.187086661574303 0.186409051317023 0.16627170341325 0.16643031647693
WaldStatistic_Intercept WaldStatistic_time_15w_vs_02w WaldStatistic_time_25w_vs_02w
<numeric> <numeric> <numeric>
Xkr4 -0.796465580810876 -0.0545279776897975 -0.0395800991928805
Mrpl15 99.437702922678 -1.06979747499584 -0.321773787287314
Lypla1 128.895889983904 -5.30731961278428 -5.0472385130895
Tcea1 88.022814863515 -3.6089462009025 -3.12023179227889
Rgs20 64.012840378327 -2.74073242587354 -2.30089101134551
Atp6v1h 125.662199880281 -4.17367914845039 -3.11046503699663
WaldStatistic_trt_CON_vs_UVB WaldStatistic_trt_SFN_vs_UVB WaldStatistic_time15w.trtCON
<numeric> <numeric> <numeric>
Xkr4 0.0116640007749233 0.396806575171895 0.374570053157096
Mrpl15 -1.90898598815487 1.54699164091361 0.075258013461256
Lypla1 -2.10063942270993 1.52641235771297 1.49100452703975
Tcea1 -1.11940109701175 -0.582105337392646 1.35677396882921
Rgs20 -0.280938355992205 0.572733177284935 -0.145210388172487
Atp6v1h -1.23276175197943 0.681667864434048 1.32010609208693
WaldStatistic_time25w.trtCON WaldStatistic_time15w.trtSFN WaldStatistic_time25w.trtSFN WaldPvalue_Intercept
<numeric> <numeric> <numeric> <numeric>
Xkr4 0.527663814974626 -0.282330171708181 -0.266633187476376 0.425761473479907
Mrpl15 -0.494058092516009 -0.982073161091917 -0.39241252363216 0
Lypla1 1.84835254620728 -0.638918644974184 -0.199213533836397 0
Tcea1 2.10352336110292 1.33920782823731 0.512039235127185 0
Rgs20 -0.378367081099077 0.870153848805504 -0.00760322926581116 0
Atp6v1h 1.72391774240929 1.03333520573646 0.25298117064282 0
WaldPvalue_time_15w_vs_02w WaldPvalue_time_25w_vs_02w WaldPvalue_trt_CON_vs_UVB WaldPvalue_trt_SFN_vs_UVB
<numeric> <numeric> <numeric> <numeric>
Xkr4 0.956514518769316 0.968427893548228 0.990693684883985 0.691510101678589
Mrpl15 0.284710479117951 0.747624073744977 0.0562638992384959 0.121865261334255
Lypla1 1.11249010699918e-07 4.48241630561262e-07 0.0356726306570048 0.126907201963519
Tcea1 0.000307443353893709 0.00180708779608543 0.262969063182376 0.560495730233128
Rgs20 0.00613024072810697 0.0213977923150809 0.778757681035553 0.566825369916453
Atp6v1h 2.99719777251695e-05 0.00186793007683514 0.217664665159797 0.49544899193699
WaldPvalue_time15w.trtCON WaldPvalue_time25w.trtCON WaldPvalue_time15w.trtSFN WaldPvalue_time25w.trtSFN
<numeric> <numeric> <numeric> <numeric>
Xkr4 0.707980248270468 0.597732692383313 0.777690352523301 0.789751600479958
Mrpl15 0.940009427107256 0.621265153192805 0.326063806588436 0.694753434090283
Lypla1 0.135960306359441 0.0645513587969476 0.522875857997676 0.842095713129838
Tcea1 0.174853041105187 0.0354200453908308 0.180503024691872 0.608623550427472
Rgs20 0.88454476429304 0.705157919128452 0.384216333175638 0.993933559205863
Atp6v1h 0.18679959906884 0.0847226938662104 0.301447057198772 0.800282763673559
betaConv betaIter deviance maxCooks
<logical> <numeric> <numeric> <logical>
Xkr4 TRUE 13 25.9033824686373 NA
Mrpl15 TRUE 2 165.306361397833 NA
Lypla1 TRUE 2 196.962147294101 NA
Tcea1 TRUE 2 157.679951768679 NA
Rgs20 TRUE 3 178.614721232345 NA
Atp6v1h TRUE 2 192.597108944526 NA
# res_con_uvb_week2 <- results(dds,
# contrast = c(0,0,0,1,0,0,0,0,0),
# alpha = 0.1)
# SAME AS:
res_con_uvb_week2 <- results(dds,
name = "trt_CON_vs_UVB",
alpha = 0.1)
res_con_uvb_week2 <- res_con_uvb_week2[order(res_con_uvb_week2$padj,
decreasing = FALSE),]
summary(res_con_uvb_week2)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1546, 9%
LFC < 0 (down) : 1537, 8.9%
outliers [1] : 0, 0%
low counts [2] : 2335, 14%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_con_uvb_week2$padj < 0.1,
na.rm = TRUE)
[1] 3083
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w2_con_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
plotMA(res_con_uvb_week2,
main = "Control vs. UVB at Week 2",
alpha = 0.8)
graphics.off()
plotMA(res_con_uvb_week2,
main = "Control vs. UVB at Week 2",
alpha = 0.8)
# res_sfn_uvb_week2 <- results(dds,
# contrast = c(0,0,0,0,1,0,0,0,0),
# alpha = 0.1)
# SAME AS;
res_sfn_uvb_week2 <- results(dds,
name = "trt_SFN_vs_UVB",
alpha = 0.1)
res_sfn_uvb_week2 <- res_sfn_uvb_week2[order(res_sfn_uvb_week2$padj,
decreasing = FALSE),]
summary(res_sfn_uvb_week2)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 26, 0.15%
LFC < 0 (down) : 35, 0.2%
outliers [1] : 0, 0%
low counts [2] : 3669, 21%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_sfn_uvb_week2$padj < 0.1,
na.rm = TRUE)
[1] 61
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w2_sfn_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
print(plotMA(res_sfn_uvb_week2,
main = "UVB+SFN vs UVB at Week 2",
alpha = 0.8))
NULL
graphics.off()
print(plotMA(res_sfn_uvb_week2,
main = "UVB+SFN vs UVB at Week 2",
alpha = 0.8))
NULL
lgene.w2.con <- unique(res_con_uvb_week2@rownames[res_con_uvb_week2$padj < 0.1])
lgene.w2.sfn <- unique(res_sfn_uvb_week2@rownames[res_sfn_uvb_week2$padj < 0.1])
lgene.w2 <- lgene.w2.con[lgene.w2.con %in% lgene.w2.sfn]
lgene.w2 <- lgene.w2 [!is.na(lgene.w2 )]
lgene.w2
[1] "Utrn" "Stom" "Tesc" "Cited4" "Cdhr1" "Slc7a11" "Mki67" "Cyp26b1" "Smc2" "Mad2l1" "Slc4a7"
[12] "Ankrd23" "Ifitm3" "Etv3" "Pla2g4d" "Fetub" "Kif11" "Ccl6" "Has3" "Il19" "A4galt" "Otud1"
[23] "Msn" "Nqo1" "Dbf4" "Cblb" "Tbc1d24" "Elmo2" "Cd163" "Esd" "Rfx2" "Gsta1" "Slurp1"
[34] "Arntl2" "Vldlr" "Tmem173" "Gpx2" "Slfn9" "Adh7" "Sprr2i" "Bcl2l15"
Plot of DESeq-normalizedcounts of genes significant in both comparisons at Week 2:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene.w2)) {
out <- plotCounts(dds,
gene = lgene.w2[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene.w2[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene.w2)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
datatable(head(dmu),
rownames = FALSE,
class = "cell-border stripe") %>%
formatRound(columns = 4,
digits = 2)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$up.dn]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$up.dn]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$dn.up]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$dn.up]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
In many of these genes, UVB+SFN moved closer to UVB over time.
up.dn.w2 <- unique(as.character(dmu.w2$Geneid[dmu.w2$up.dn]))
dn.up.w2 <- unique(as.character(dmu.w2$Geneid[dmu.w2$dn.up]))
ll <- unique(c(up.dn.w2,
dn.up.w2))
# 36 genes
con_uvb_week2 <- data.table(Geneid = res_con_uvb_week2@rownames,
log2FoldChange = res_con_uvb_week2@listData$log2FoldChange)
sfn_uvb_week2 <- data.table(Geneid = res_sfn_uvb_week2@rownames,
log2FoldChange = -res_sfn_uvb_week2@listData$log2FoldChange)
t1 <- merge(con_uvb_week2[con_uvb_week2$Geneid %in% ll, ],
sfn_uvb_week2[sfn_uvb_week2$Geneid %in% ll, ],
by = "Geneid")
colnames(t1)[2:3] <- c("Control vs. UVB",
"UVB vs. SFN+UVB")
t1 <- t1[order(t1$`Control vs. UVB`,
decreasing = TRUE), ]
write.csv(t1,
file = "tmp/w2_sign_changes.csv",
row.names = FALSE)
ll <- melt.data.table(data = t1,
id.vars = 1,
measure.vars = 2:3,
variable.name = "Comparison",
value.name = "Gene Expression Diff")
ll$Comparison <- factor(ll$Comparison,
levels = c("Control vs. UVB",
"UVB vs. SFN+UVB"))
lvls <- ll[ll$Comparison == "Control vs. UVB", ]
ll$Geneid <- factor(ll$Geneid,
levels = lvls$Geneid[order(lvls$`Gene Expression Diff`)])
# Add dendrogram----
dt.dndr <- data.frame(t1[Geneid %in% levels(ll$Geneid), ])
rownames(dt.dndr) <- dt.dndr$Gene
dt.dndr <- dt.dndr[, -1]
# Compute distances between genes----
sampleDists <- dist(dt.dndr)
# Make dendrogram data----
dhc <- as.dendrogram(hclust(d = sampleDists),
horiz = TRUE)
ddata <- dendro_data(dhc,
type = "rectangle")
# Segment data----
dtp1 <- segment(ddata)
# Hitmap data----
dtp2 <- ll
dtp2$Geneid <- factor(dtp2$Geneid,
levels = ddata$labels$label)
offset.size <- 4
p1 <- ggplot(data = dtp2) +
coord_polar("y",
start = -0.3,
direction = -1) +
geom_tile(aes(x = as.numeric(Comparison),
y = Geneid,
fill = `Gene Expression Diff`),
color = "white") +
geom_text(data = dtp2[Comparison == "Control vs. UVB", ],
aes(x = rep(1.75,
nlevels(Geneid)),
y = Geneid,
angle = 90 + seq(from = 30,
to = 330,
length.out = nlevels(Geneid))[as.numeric(Geneid)] +
offset.size,
label = unique(Geneid)),
hjust = 0) +
geom_text(data = dtp2[Geneid == levels(dtp2$Geneid)[1], ],
aes(x = 1:nlevels(Comparison),
y = rep(-offset.size,
nlevels(Comparison)),
angle = 0,
label = levels(Comparison)),
hjust = 1,
size = 5) +
scale_fill_gradient2(low = "red",
high = "green",
mid = "grey",
midpoint = 0,
name = "") +
scale_y_discrete("",
expand = c(0, 0)) +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 15),
legend.direction = "horizontal",
legend.key.width = unit(1, "in"),
legend.key.height = unit(0.3, "in")) +
geom_segment(data = dtp1,
aes(x = -sqrt(y) + 0.5,
y = x,
xend = -sqrt(yend) + 0.5,
yend = xend),
size = 1)
tiff(filename = "tmp/skin_ubv_w2_sfn_hitmap_with_phylo.tiff",
height = 8,
width = 8,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
# 1. Ctrl vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 1546, 9%
# LFC < 0 (down) : 1537, 8.9%
# 23 genes down-up-down
# 2. SFN+UVB vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 26, 0.15%
# LFC < 0 (down) : 35, 0.2%
# 13 gens up-down-up
p1 <- ggplot() +
geom_circle(aes(x0 = c(1, 2, 1, 2),
y0 = c(1, 1, 4, 4),
r = rep(1, 4),
color = factor(c(2, 1, 1, 2))),
size = 2) +
geom_text(aes(x = rep(c(0.5, 1.5, 2.5), 2),
y = rep(c(1, 4), each = 3),
label = format(c(26, 13, 35, 1546, 23, 1537),
big.mark = ","))) +
scale_color_manual(values = c("green", "red")) +
theme_void() +
theme(legend.position = "none")
tiff(filename = "tmp/skin_ubv_sfn_w2_venn.tiff",
height = 6,
width = 4,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
res_con_uvb_week15 <- results(dds,
contrast = c(0,0,0,1,0,1,0,0,0),
alpha = 0.1)
res_con_uvb_week15 <- res_con_uvb_week15[order(res_con_uvb_week15$padj,
decreasing = FALSE),]
summary(res_con_uvb_week15)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1513, 8.8%
LFC < 0 (down) : 1463, 8.5%
outliers [1] : 0, 0%
low counts [2] : 2668, 16%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 1513, 8.8%
# LFC < 0 (down) : 1463, 8.5%
# outliers [1] : 0, 0%
# low counts [2] : 2668, 16%
# (mean count < 2)
# How many adjusted p-values were less than 0.01?
sum(res_con_uvb_week15$padj < 0.1,
na.rm = TRUE)
[1] 2976
# 2976
# NOT THE SAME AS?!!!:
res_con_uvb_week15.1 <- results(dds,
contrast = list("trt_CON_vs_UVB",
"time15w.trtCON"),
alpha = 0.1)
res_con_uvb_week15.1 <- res_con_uvb_week15.1[order(res_con_uvb_week15.1$padj,
decreasing = FALSE),]
summary(res_con_uvb_week15.1)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 469, 2.7%
LFC < 0 (down) : 455, 2.6%
outliers [1] : 0, 0%
low counts [2] : 4002, 23%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 469, 2.7%
# LFC < 0 (down) : 455, 2.6%
# outliers [1] : 0, 0%
# low counts [2] : 4002, 23%
# (mean count < 6)
# How many adjusted p-values were less than 0.1?
sum(res_con_uvb_week15.1$padj < 0.1,
na.rm = TRUE)
[1] 924
# 924
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w15_con_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
plotMA(res_con_uvb_week15,
main = "Control vs. UVB at Week 15",
alpha = 0.8)
graphics.off()
plotMA(res_con_uvb_week15,
main = "Control vs. UVB at Week 15",
alpha = 0.8)
res_sfn_uvb_week15 <- results(dds,
contrast = c(0,0,0,0,1,0,0,1,0),
alpha = 0.1)
res_sfn_uvb_week15 <- res_sfn_uvb_week15[order(res_sfn_uvb_week15$padj,
decreasing = FALSE),]
summary(res_sfn_uvb_week15)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 20, 0.12%
LFC < 0 (down) : 10, 0.058%
outliers [1] : 0, 0%
low counts [2] : 7004, 41%
(mean count < 53)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 20, 0.12%
# LFC < 0 (down) : 10, 0.058%
# outliers [1] : 0, 0%
# low counts [2] : 7004, 41%
# (mean count < 53)
# How many adjusted p-values were less than 0.05?
sum(res_sfn_uvb_week15$padj < 0.1,
na.rm = TRUE)
[1] 30
# 30
# NOT THE SAME AS!!!:
res_sfn_uvb_week15.1 <- results(dds,
contrast = list("trt_SFN_vs_UVB",
"time15w.trtSFN"),
alpha = 0.1)
res_sfn_uvb_week15.1 <- res_sfn_uvb_week15.1[order(res_sfn_uvb_week15.1$padj,
decreasing = FALSE),]
summary(res_sfn_uvb_week15.1)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 14, 0.081%
LFC < 0 (down) : 24, 0.14%
outliers [1] : 0, 0%
low counts [2] : 3335, 19%
(mean count < 4)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 14, 0.081%
# LFC < 0 (down) : 24, 0.14%
# outliers [1] : 0, 0%
# low counts [2] : 3335, 19%
# (mean count < 4)
# How many adjusted p-values were less than 0.1?
sum(res_sfn_uvb_week15.1$padj < 0.1,
na.rm = TRUE)
[1] 38
# 38
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w2_sfn_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
print(plotMA(res_sfn_uvb_week15,
main = "UVB+SFN vs UVB at Week 15",
alpha = 0.8))
NULL
graphics.off()
print(plotMA(res_sfn_uvb_week15,
main = "UVB+SFN vs UVB at Week 15",
alpha = 0.8))
NULL
lgene.w15.con <- unique(res_con_uvb_week15@rownames[res_con_uvb_week15$padj < 0.1])
lgene.w15.sfn <- unique(res_sfn_uvb_week15@rownames[res_sfn_uvb_week15$padj < 0.1])
lgene.w15 <- lgene.w15.con[lgene.w15.con %in% lgene.w15.sfn]
lgene.w15 <- lgene.w15 [!is.na(lgene.w15 )]
length(unique(lgene.w15))
[1] 15
Plot of DESeq-normalizedcounts of genes significant in both comparisons at Week 15:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene.w15)) {
out <- plotCounts(dds,
gene = lgene.w15[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene.w15[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene.w15)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
datatable(head(dmu),
rownames = FALSE,
class = "cell-border stripe") %>%
formatRound(columns = 4,
digits = 2)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w15$Geneid[dmu.w15$up.dn]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w15$Geneid[dmu.w15$up.dn]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w15$Geneid[dmu.w15$dn.up]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w15$Geneid[dmu.w15$dn.up]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
up.dn.w15 <- unique(as.character(dmu.w15$Geneid[dmu.w15$up.dn]))
dn.up.w15 <- unique(as.character(dmu.w15$Geneid[dmu.w15$dn.up]))
ll <- unique(c(up.dn.w15,
dn.up.w15))
# 16 genes
con_uvb_week15 <- data.table(Geneid = res_con_uvb_week15@rownames,
log2FoldChange = res_con_uvb_week15@listData$log2FoldChange)
sfn_uvb_week15 <- data.table(Geneid = res_sfn_uvb_week15@rownames,
log2FoldChange = -res_sfn_uvb_week15@listData$log2FoldChange)
t1 <- merge(con_uvb_week15[con_uvb_week15$Geneid %in% ll, ],
sfn_uvb_week15[sfn_uvb_week15$Geneid %in% ll, ],
by = "Geneid")
colnames(t1)[2:3] <- c("Control vs. UVB",
"UVB vs. SFN+UVB")
t1 <- t1[order(t1$`Control vs. UVB`,
decreasing = TRUE), ]
write.csv(t1,
file = "tmp/w15_sign_changes.csv",
row.names = FALSE)
ll <- melt.data.table(data = t1,
id.vars = 1,
measure.vars = 2:3,
variable.name = "Comparison",
value.name = "Gene Expression Diff")
ll$Comparison <- factor(ll$Comparison,
levels = c("Control vs. UVB",
"UVB vs. SFN+UVB"))
lvls <- ll[ll$Comparison == "Control vs. UVB", ]
ll$Geneid <- factor(ll$Geneid,
levels = lvls$Geneid[order(lvls$`Gene Expression Diff`)])
# Add dendrogram----
dt.dndr <- data.frame(t1[Geneid %in% levels(ll$Geneid), ])
rownames(dt.dndr) <- dt.dndr$Gene
dt.dndr <- dt.dndr[, -1]
# Compute distances between genes----
sampleDists <- dist(dt.dndr)
# Make dendrogram data----
dhc <- as.dendrogram(hclust(d = sampleDists),
horiz = TRUE)
ddata <- dendro_data(dhc,
type = "rectangle")
# Segment data----
dtp1 <- segment(ddata)
# Hitmap data----
dtp2 <- ll
dtp2$Geneid <- factor(dtp2$Geneid,
levels = ddata$labels$label)
offset.size <- 4
p1 <- ggplot(data = dtp2) +
coord_polar("y",
start = -0.3,
direction = -1) +
geom_tile(aes(x = as.numeric(Comparison),
y = Geneid,
fill = `Gene Expression Diff`),
color = "white") +
geom_text(data = dtp2[Comparison == "Control vs. UVB", ],
aes(x = rep(1.75,
nlevels(Geneid)),
y = Geneid,
angle = 90 + seq(from = 90,
to = 330,
length.out = nlevels(Geneid))[as.numeric(Geneid)] +
offset.size,
label = unique(Geneid)),
hjust = 0) +
geom_text(data = dtp2[Geneid == levels(dtp2$Geneid)[1], ],
aes(x = 1:nlevels(Comparison),
y = rep(-offset.size,
nlevels(Comparison)),
angle = 0,
label = levels(Comparison)),
hjust = 1,
size = 5) +
scale_fill_gradient2(low = "red",
high = "green",
mid = "grey",
midpoint = 0,
name = "") +
scale_y_discrete("",
expand = c(0, 0)) +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 15),
legend.direction = "horizontal",
legend.key.width = unit(1, "in"),
legend.key.height = unit(0.3, "in")) +
geom_segment(data = dtp1,
aes(x = -sqrt(y) + 0.5,
y = x,
xend = -sqrt(yend) + 0.5,
yend = xend),
size = 1)
tiff(filename = "tmp/skin_ubv_w15_sfn_hitmap_with_phylo.tiff",
height = 8,
width = 8,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
# out of 17202 with nonzero total read count
# 1. Ctrl vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 1513, 8.8%
# LFC < 0 (down) : 1463, 8.5%
# 2 genes down-up-down
# 2. SFN+UVB vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 20, 0.12%
# LFC < 0 (down) : 10, 0.058%
# 9 gens up-down-up
p1 <- ggplot() +
geom_circle(aes(x0 = c(1, 2, 1, 2),
y0 = c(1, 1, 4, 4),
r = rep(1, 4),
color = factor(c(2, 1, 1, 2))),
size = 2) +
geom_text(aes(x = rep(c(0.5, 1.5, 2.5), 2),
y = rep(c(1, 4), each = 3),
label = format(c(20, 9, 10, 1513, 2, 1463),
big.mark = ","))) +
scale_color_manual(values = c("green", "red")) +
theme_void() +
theme(legend.position = "none")
tiff(filename = "tmp/skin_ubv_sfn_w2_venn.tiff",
height = 6,
width = 4,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
res_con_uvb_week25 <- results(dds,
contrast = c(0,0,0,1,0,0,1,0,0),
alpha = 0.1)
res_con_uvb_week25 <- res_con_uvb_week25[order(res_con_uvb_week25$padj,
decreasing = FALSE),]
summary(res_con_uvb_week25)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 3389, 20%
LFC < 0 (down) : 2917, 17%
outliers [1] : 0, 0%
low counts [2] : 2335, 14%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 3389, 20%
# LFC < 0 (down) : 2917, 17%
# outliers [1] : 0, 0%
# low counts [2] : 2335, 14%
# (mean count < 2)
# How many adjusted p-values were less than 0.1?
sum(res_con_uvb_week25$padj < 0.1,
na.rm = TRUE)
[1] 6306
# 6306
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w15_con_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
plotMA(res_con_uvb_week25,
main = "Control vs. UVB at Week 25",
alpha = 0.8)
graphics.off()
plotMA(res_con_uvb_week25,
main = "Control vs. UVB at Week 25",
alpha = 0.8)
res_sfn_uvb_week25 <- results(dds,
contrast = c(0,0,0,0,1,0,0,0,1),
alpha = 0.1)
res_sfn_uvb_week25 <- res_sfn_uvb_week25[order(res_sfn_uvb_week25$padj,
decreasing = FALSE),]
summary(res_sfn_uvb_week25)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 3, 0.017%
LFC < 0 (down) : 8, 0.047%
outliers [1] : 0, 0%
low counts [2] : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# out of 17202 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 3, 0.017%
# LFC < 0 (down) : 8, 0.047%
# outliers [1] : 0, 0%
# low counts [2] : 0, 0%
# (mean count < 0)
# How many adjusted p-values were less than 0.05?
sum(res_sfn_uvb_week25$padj < 0.1,
na.rm = TRUE)
[1] 11
# 30
# MA plot
# Save for publication
tiff(filename = "tmp/ma_w2_sfn_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
print(plotMA(res_sfn_uvb_week25,
main = "UVB+SFN vs UVB at Week 25",
alpha = 0.8))
NULL
graphics.off()
print(plotMA(res_sfn_uvb_week25,
main = "UVB+SFN vs UVB at Week 25",
alpha = 0.8))
NULL
lgene.w25.con <- unique(res_con_uvb_week25@rownames[res_con_uvb_week25$padj < 0.1])
lgene.w25.sfn <- unique(res_sfn_uvb_week25@rownames[res_sfn_uvb_week25$padj < 0.1])
lgene.w25 <- lgene.w25.con[lgene.w25.con %in% lgene.w25.sfn]
lgene.w25 <- lgene.w25 [!is.na(lgene.w25 )]
length(unique(lgene.w25))
[1] 8
Plot of DESeq-normalizedcounts of genes significant in both comparisons at Week 25:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene.w25)) {
out <- plotCounts(dds,
gene = lgene.w25[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene.w25[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene.w25)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
datatable(head(dmu),
rownames = FALSE,
class = "cell-border stripe") %>%
formatRound(columns = 4,
digits = 2)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w25$Geneid[dmu.w25$up.dn]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w25$Geneid[dmu.w25$up.dn]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w25$Geneid[dmu.w25$dn.up]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w25$Geneid[dmu.w25$dn.up]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
up.dn.w25 <- unique(as.character(dmu.w25$Geneid[dmu.w25$up.dn]))
dn.up.w25 <- unique(as.character(dmu.w25$Geneid[dmu.w25$dn.up]))
ll <- unique(c(up.dn.w25,
dn.up.w25))
# 8 genes
con_uvb_week25 <- data.table(Geneid = res_con_uvb_week25@rownames,
log2FoldChange = res_con_uvb_week25@listData$log2FoldChange)
sfn_uvb_week25 <- data.table(Geneid = res_sfn_uvb_week25@rownames,
log2FoldChange = -res_sfn_uvb_week25@listData$log2FoldChange)
t1 <- merge(con_uvb_week25[con_uvb_week25$Geneid %in% ll, ],
sfn_uvb_week25[sfn_uvb_week25$Geneid %in% ll, ],
by = "Geneid")
colnames(t1)[2:3] <- c("Control vs. UVB",
"UVB vs. SFN+UVB")
t1 <- t1[order(t1$`Control vs. UVB`,
decreasing = TRUE), ]
write.csv(t1,
file = "tmp/w25_sign_changes.csv",
row.names = FALSE)
ll <- melt.data.table(data = t1,
id.vars = 1,
measure.vars = 2:3,
variable.name = "Comparison",
value.name = "Gene Expression Diff")
ll$Comparison <- factor(ll$Comparison,
levels = c("Control vs. UVB",
"UVB vs. SFN+UVB"))
lvls <- ll[ll$Comparison == "Control vs. UVB", ]
ll$Geneid <- factor(ll$Geneid,
levels = lvls$Geneid[order(lvls$`Gene Expression Diff`)])
# Add dendrogram----
dt.dndr <- data.frame(t1[Geneid %in% levels(ll$Geneid), ])
rownames(dt.dndr) <- dt.dndr$Gene
dt.dndr <- dt.dndr[, -1]
# Compute distances between genes----
sampleDists <- dist(dt.dndr)
# Make dendrogram data----
dhc <- as.dendrogram(hclust(d = sampleDists),
horiz = TRUE)
ddata <- dendro_data(dhc,
type = "rectangle")
# Segment data----
dtp1 <- segment(ddata)
# Hitmap data----
dtp2 <- ll
dtp2$Geneid <- factor(dtp2$Geneid,
levels = ddata$labels$label)
offset.size <- 4
p1 <- ggplot(data = dtp2) +
coord_polar("y",
start = -0.3,
direction = -1) +
geom_tile(aes(x = as.numeric(Comparison),
y = Geneid,
fill = `Gene Expression Diff`),
color = "white") +
geom_text(data = dtp2[Comparison == "Control vs. UVB", ],
aes(x = rep(1.75,
nlevels(Geneid)),
y = Geneid,
angle = 90 + seq(from = 120,
to = 330,
length.out = nlevels(Geneid))[as.numeric(Geneid)] +
offset.size,
label = unique(Geneid)),
hjust = 0) +
geom_text(data = dtp2[Geneid == levels(dtp2$Geneid)[1], ],
aes(x = 1:nlevels(Comparison),
y = rep(-offset.size,
nlevels(Comparison)),
angle = 0,
label = levels(Comparison)),
hjust = 1,
size = 5) +
scale_fill_gradient2(low = "red",
high = "green",
mid = "grey",
midpoint = 0,
name = "") +
scale_y_discrete("",
expand = c(0, 0)) +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 15),
legend.direction = "horizontal",
legend.key.width = unit(1, "in"),
legend.key.height = unit(0.3, "in")) +
geom_segment(data = dtp1,
aes(x = -sqrt(y) + 0.5,
y = x,
xend = -sqrt(yend) + 0.5,
yend = xend),
size = 1)
tiff(filename = "tmp/skin_ubv_w25_sfn_hitmap_with_phylo.tiff",
height = 8,
width = 8,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
# out of 17202 with nonzero total read count
# 1. Ctrl vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 3389, 20%
# LFC < 0 (down) : 2917, 17%
# 6 genes down-up-down
# 2. SFN+UVB vs. UVB
# adjusted p-value < 0.1
# LFC > 0 (up) : 3, 0.017%
# LFC < 0 (down) : 8, 0.047%
# 2 gens up-down-up
p1 <- ggplot() +
geom_circle(aes(x0 = c(1, 2, 1, 2),
y0 = c(1, 1, 4, 4),
r = rep(1, 4),
color = factor(c(2, 1, 1, 2))),
size = 2) +
geom_text(aes(x = rep(c(0.5, 1.5, 2.5), 2),
y = rep(c(1, 4), each = 3),
label = format(c(3, 2, 8, 3389, 6, 2917),
big.mark = ","))) +
scale_color_manual(values = c("green", "red")) +
theme_void() +
theme(legend.position = "none")
tiff(filename = "tmp/skin_ubv_sfn_w2_venn.tiff",
height = 6,
width = 4,
units = 'in',
res = 600,
compression = "lzw+p")
plot(p1)
graphics.off()
print(p1)
NOTE: By default, the results(dds)* prints the results for the last level of the last term, i.e. here it was for for the interaction term SFN vs. UVB at Week 15 vs. Week 2.
Test if the effect of NOT treating with UVB vs. treating with UVB is different at Week 15 compared to Week 2:
res_int_con_uvb_week2.15 <- results(dds,
name = "time15w.trtCON",
alpha = 0.1)
res_int_con_uvb_week2.15 <- res_int_con_uvb_week2.15[order(res_int_con_uvb_week2.15$padj,
decreasing = FALSE),]
print(res_int_con_uvb_week2.15)
log2 fold change (MLE): time15w.trtCON
Wald test p-value: time15w.trtCON
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
Ces2g 1233.64052107766 1.65919128471462 0.219706072210844 7.55186813008224 4.29058805762623e-14
Chil4 729.990857182023 -11.1293956873127 1.54858690264879 -7.18680731980641 6.63239033912628e-13
Tiparp 683.339510901133 1.49955960160452 0.248262587560916 6.04021579061555 1.53908254415537e-09
Slc25a37 391.064324378349 -1.45460152768479 0.244923011988198 -5.93901534966785 2.8673902141708e-09
H2-M2 206.94506916379 -1.98012352045144 0.34512732806993 -5.73737099152641 9.61574727613009e-09
... ... ... ... ... ...
Gpm6b 6.76909966446477 -3.26887203161146 1.55193921192052 -2.10631447836558 0.0351770444831032
Tlr7 1.11233183040672 0.165798521539309 3.90101579949831 0.0425013714532075 0.966099018478313
Arhgap6 1.55558065988387 0.701543913331167 2.69733929865055 0.26008738080602 0.794796371714883
Spry3 2.92590454614356 -0.756574618876826 2.19462355565 -0.344740042969577 0.730289811494176
Zf12 0.240459283234895 1.53995508412402 7.75773006704587 0.198505886491927 0.842649279637729
padj
<numeric>
Ces2g 5.23408837149824e-10
Chil4 4.04542648735007e-09
Tiparp 6.25842265205047e-06
Slc25a37 8.74482330566741e-06
H2-M2 2.34605002043022e-05
... ...
Gpm6b NA
Tlr7 NA
Arhgap6 NA
Spry3 NA
Zf12 NA
summary(res_int_con_uvb_week2.15)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 62, 0.36%
LFC < 0 (down) : 81, 0.47%
outliers [1] : 0, 0%
low counts [2] : 5003, 29%
(mean count < 14)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.1?
sum(res_int_con_uvb_week2.15$padj < 0.1,
na.rm = TRUE)
[1] 143
# MA plot
print(plotMA(res_int_con_uvb_week2.15,
main = "(Control vs. UVB) x (Week 15 vs. Week 2) Interaction",
alpha = 0.9))
NULL
Test if the effect of treating with UVB+SFN vs. treating with UVB is different at Week 15 compared to Week 2:
res_int_sfn_uvb_week2.15 <- results(dds,
name = "time15w.trtSFN",
alpha = 0.1)
res_int_sfn_uvb_week2.15 <- res_int_sfn_uvb_week2.15[order(res_int_sfn_uvb_week2.15$padj,
decreasing = FALSE),]
print(res_int_sfn_uvb_week2.15)
log2 fold change (MLE): time15w.trtSFN
Wald test p-value: time15w.trtSFN
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
Sprr2i 160.426257994504 2.83987384043744 0.455041717539916 6.24090875841143 4.35035967805996e-10
Jakmip2 63.5056363214658 3.21303587147397 0.539789109889979 5.95239105903574 2.64253030673362e-09
Ankrd37 235.286753079087 1.69690697674512 0.334405230598706 5.07440321345167 3.8871402035267e-07
Rabgap1l 952.654178700566 0.885094830670513 0.193655876057218 4.5704517140962 4.86674050843135e-06
Xdh 997.089593301958 1.17874278667067 0.254355414980077 4.63423507914307 3.58259699502912e-06
... ... ... ... ... ...
Tex13 0.257950623188226 -0.171416964885152 6.80505378756029 -0.0251896561344605 0.979903687548783
Trpc5os 0.25226659796839 -1.67756077298105 6.85574125799013 -0.244694294876708 0.806693145810482
Gm6568 0.246930247155146 1.31275277456817 6.92124043267287 0.189670159177119 0.849567605832819
Rs1 0.304746600897428 -2.68634965578062 5.6142429285098 -0.478488318013282 0.632302686934451
Zf12 0.240459283234895 -1.67793188223364 6.93607854502649 -0.241913621845704 0.808847094876152
padj
<numeric>
Sprr2i 7.33818670495154e-06
Jakmip2 2.22871006069914e-05
Ankrd37 0.00218560936510295
Rabgap1l 0.0139492184264559
Xdh 0.0139492184264559
... ...
Tex13 NA
Trpc5os NA
Gm6568 NA
Rs1 NA
Zf12 NA
summary(res_int_sfn_uvb_week2.15)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 11, 0.064%
LFC < 0 (down) : 2, 0.012%
outliers [1] : 0, 0%
low counts [2] : 334, 1.9%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.1?
sum(res_int_sfn_uvb_week2.15$padj < 0.1,
na.rm = TRUE)
[1] 13
# MA plot
print(plotMA(res_int_sfn_uvb_week2.15,
main = "(UVB+SFN vs. UVB) x (Week 15 vs. Week 2) Interaction",
alpha = 0.9))
NULL
Genes with both interactions being significant:
lgene.con <- unique(res_int_con_uvb_week2.15@rownames[res_int_con_uvb_week2.15$padj < 0.1])
lgene.sfn <- unique(res_int_sfn_uvb_week2.15@rownames[res_int_sfn_uvb_week2.15$padj < 0.1])
lgene <- lgene.con[lgene.con %in% lgene.sfn]
lgene <- lgene[!is.na(lgene)]
lgene
[1] "Jakmip2" "Rabgap1l" "Alox8" "Xdh"
Plot of DESeq-normalizedcounts of genes with smallest adjusted p-value for the interaction term:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene)) {
out <- plotCounts(dds,
gene = lgene[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
p1 <- ggplot(dp1,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
Compare to the plot of TPM-normalizedcounts of genes with smallest adjusted p-value for the interaction term:
# Examine TPM values for the same genes
tmp <- tpm[Geneid %in% lgene, ]
tmp$Geneid <- factor(tmp$Geneid,
levels = lgene)
tmp <- melt.data.table(data = tmp,
id.vars = 1,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp <- merge(dmeta,
tmp,
by = "Sample")
p1 <- ggplot(tmp,
aes(x = Week,
y = TPM,
fill = Treatment,
group = Treatment)) +
facet_wrap(~ Geneid,
scales = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black")+
scale_x_discrete("")
plot(p1)
Test if the effect of NOT treating with UVB vs. treating with UVB is different at Week 25 compared to Week 2:
res_int_con_uvb_week2.25 <- results(dds,
name = "time25w.trtCON",
alpha = 0.1)
res_int_con_uvb_week2.25 <- res_int_con_uvb_week2.25[order(res_int_con_uvb_week2.25$padj,
decreasing = FALSE),]
print(res_int_con_uvb_week2.25)
log2 fold change (MLE): time25w.trtCON
Wald test p-value: time25w.trtCON
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
Zbed6 368.841303657605 3.43320752362312 0.359243339538936 9.55677432469426 1.21484098914454e-21
Sprr2a3 518.874807312777 -3.00829927788609 0.330194475297753 -9.11068931475413 8.18601891283475e-20
Ago2 322.996173044204 3.21696389905116 0.362969229893256 8.86291077620331 7.79519647652128e-19
Eda2r 195.857675971173 5.01047409106188 0.584879491697492 8.56667768691976 1.06513485992649e-17
Arhgap5 441.762889984552 2.40713754467649 0.285949703349475 8.4180452592902 3.82817902320605e-17
... ... ... ... ... ...
Bmx 0.64991670064697 -2.80585836369312 3.77968936402364 -0.74235157798951 0.457874349310709
Asb11 1.80367310297613 1.83479523174166 2.92382600120635 0.627532291930037 0.530310375641278
Tlr7 1.11233183040672 -2.63759079667608 3.68345223708522 -0.716064883404936 0.473951286070696
Arhgap6 1.55558065988387 -0.392494382167074 2.54362703703181 -0.15430500480333 0.877369251317559
Zf12 0.240459283234895 -2.48751160898273 7.69522395943148 -0.323253958831694 0.746502919247372
padj
<numeric>
Zbed6 1.76564989362268e-17
Sprr2a3 5.94877994395702e-16
Ago2 3.77651285299201e-15
Eda2r 3.87016751354289e-14
Arhgap5 1.11277507846554e-13
... ...
Bmx NA
Asb11 NA
Tlr7 NA
Arhgap6 NA
Zf12 NA
summary(res_int_con_uvb_week2.25)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1642, 9.5%
LFC < 0 (down) : 1088, 6.3%
outliers [1] : 0, 0%
low counts [2] : 2668, 16%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.1?
sum(res_int_con_uvb_week2.25$padj < 0.1,
na.rm = TRUE)
[1] 2730
# MA plot
print(plotMA(res_int_con_uvb_week2.25,
main = "(Control vs. UVB) x (Week 25 vs. Week 2) Interaction",
alpha = 0.9))
NULL
Test if the effect of treating with UVB+SFN vs. treating with UVB is different at Week 25 compared to Week 25:
res_int_sfn_uvb_week2.25 <- results(dds,
name = "time25w.trtSFN",
alpha = 0.1)
res_int_sfn_uvb_week2.25 <- res_int_sfn_uvb_week2.25[order(res_int_sfn_uvb_week2.25$padj,
decreasing = FALSE),]
print(res_int_sfn_uvb_week2.25)
log2 fold change (MLE): time25w.trtSFN
Wald test p-value: time25w.trtSFN
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
Tesc 359.692585044172 -2.49471096338435 0.348620418829405 -7.15595194269191 8.3094251820245e-13
Nqo1 165.978588572758 -2.89528156579134 0.449655684017191 -6.4388857267968 1.20353766216316e-10
Esd 496.271393097173 -1.22049515667394 0.23113730675729 -5.28039014470108 1.28909096546055e-07
Adh7 214.546568916766 -2.65860909307642 0.505372743386177 -5.2606895165394 1.43516196027455e-07
Gpx2 60.6702898514662 -3.4552791455525 0.672790555151612 -5.13574264545651 2.81032019912661e-07
... ... ... ... ... ...
Bmx 0.64991670064697 -2.42668569800633 3.84406354185689 -0.631281369723172 0.52785656617229
Asb11 1.80367310297613 -2.26216243538786 2.0382424420646 -1.10985935171502 0.267059638858435
Tlr7 1.11233183040672 -1.12428007168118 2.94063741844227 -0.382325296083847 0.702220093752507
Arhgap6 1.55558065988387 -0.0724532171715926 2.20398487513961 -0.0328737361081042 0.973775277021798
Zf12 0.240459283234895 -4.03163875828313 6.86068257578585 -0.58764397182759 0.556771289863908
padj
<numeric>
Tesc 1.20769185595544e-08
Nqo1 8.74610819093965e-07
Esd 0.000521466098265757
Adh7 0.000521466098265757
Gpx2 0.000816903875482124
... ...
Bmx NA
Asb11 NA
Tlr7 NA
Arhgap6 NA
Zf12 NA
summary(res_int_sfn_uvb_week2.25)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1, 0.0058%
LFC < 0 (down) : 8, 0.047%
outliers [1] : 0, 0%
low counts [2] : 2668, 16%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.1?
sum(res_int_sfn_uvb_week2.25$padj < 0.1,
na.rm = TRUE)
[1] 9
# MA plot
print(plotMA(res_int_sfn_uvb_week2.25,
main = "(UVB+SFN vs. UVB) x (Week 25 vs. Week 2) Interaction",
alpha = 0.9))
NULL
Genes with both interactions being significant:
lgene.con <- unique(res_int_con_uvb_week2.25@rownames[res_int_con_uvb_week2.25$padj < 0.1])
lgene.sfn <- unique(res_int_sfn_uvb_week2.25@rownames[res_int_sfn_uvb_week2.25$padj < 0.1])
lgene <- lgene.con[lgene.con %in% lgene.sfn]
lgene <- lgene[!is.na(lgene)]
lgene
[1] "Tesc" "Adh7"
Plot of DESeq-normalizedcounts of genes with smallest adjusted p-value for the interaction term:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene)) {
out <- plotCounts(dds,
gene = lgene[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
p1 <- ggplot(dp1,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggforce_0.2.2 ggdendro_0.1-20 ggpubr_0.2 magrittr_1.5
[5] gridExtra_2.3 scales_1.0.0 threejs_0.3.1 igraph_1.2.4.1
[9] plotly_4.9.0 ggplot2_3.1.1 readxl_1.3.1 DESeq2_1.24.0
[13] SummarizedExperiment_1.14.0 DelayedArray_0.10.0 BiocParallel_1.18.0 matrixStats_0.54.0
[17] Biobase_2.44.0 GenomicRanges_1.36.0 GenomeInfoDb_1.20.0 IRanges_2.18.0
[21] S4Vectors_0.22.0 BiocGenerics_0.30.0 DT_0.6 data.table_1.12.2
[25] knitr_1.23
loaded via a namespace (and not attached):
[1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2 httr_1.4.0
[5] tools_3.6.0 backports_1.1.4 R6_2.4.0 rpart_4.1-15
[9] Hmisc_4.2-0 DBI_1.0.0 lazyeval_0.2.2 colorspace_1.4-1
[13] nnet_7.3-12 withr_2.1.2 tidyselect_0.2.5 bit_1.1-14
[17] compiler_3.6.0 htmlTable_1.13.1 labeling_0.3 checkmate_1.9.3
[21] genefilter_1.66.0 stringr_1.4.0 digest_0.6.19 foreign_0.8-71
[25] XVector_0.24.0 base64enc_0.1-3 pkgconfig_2.0.2 htmltools_0.3.6
[29] htmlwidgets_1.3 rlang_0.3.4 rstudioapi_0.10 RSQLite_2.1.1
[33] shiny_1.3.2 farver_1.1.0 jsonlite_1.6 crosstalk_1.0.0
[37] acepack_1.4.1 dplyr_0.8.1 RCurl_1.95-4.12 GenomeInfoDbData_1.2.1
[41] Formula_1.2-3 Matrix_1.2-17 Rcpp_1.0.1 munsell_0.5.0
[45] yaml_2.2.0 stringi_1.4.3 MASS_7.3-51.4 zlibbioc_1.30.0
[49] plyr_1.8.4 grid_3.6.0 blob_1.1.1 promises_1.0.1
[53] crayon_1.3.4 lattice_0.20-38 cowplot_0.9.4 splines_3.6.0
[57] annotate_1.62.0 locfit_1.5-9.1 pillar_1.4.1 geneplotter_1.62.0
[61] XML_3.98-1.19 glue_1.3.1 latticeExtra_0.6-28 tweenr_1.0.1
[65] httpuv_1.5.1 cellranger_1.1.0 polyclip_1.10-0 gtable_0.3.0
[69] purrr_0.3.2 tidyr_0.8.3 assertthat_0.2.1 xfun_0.7
[73] mime_0.6 xtable_1.8-4 later_0.8.0 survival_2.44-1.1
[77] viridisLite_0.3.0 tibble_2.1.2 AnnotationDbi_1.46.0 memoise_1.1.0
[81] cluster_2.0.9